home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Disc to the Future 2
/
Disc to the Future Part II Programmer's Reference (Wayzata Technology)(6013)(1992).bin
/
MAC
/
MPW_TOOL
/
TOOLS
/
TOOLS_WI
/
BYACC__
/
YYLEX.C
< prev
Wrap
C/C++ Source or Header
|
1989-11-19
|
20KB
|
1,117 lines
#include <stdio.h>
#include "defs.h"
#include "dep.h"
#include "files.h"
#include "new.h"
#include "symtab.h"
#include "text.h"
#include "tokens.h"
#define MAX_BUFFER 4096
#define BUFFER_DELTA 2048
extern int lineno;
extern YYSTYPE yylval;
LOCAL char *buffer;
LOCAL int buffer_size;
LOCAL int max_buffer;
LOCAL int mark_count;
LOCAL int at_eof;
create_buffer()
{
buffer = NEW2(MAX_BUFFER, char);
buffer_size = 0;
max_buffer = MAX_BUFFER;
}
clear_buffer()
{
buffer_size = 0;
}
free_buffer()
{
FREE(buffer);
}
savec(c)
int c;
{
register char *s, *t;
register char *old_buffer;
register int i;
if (buffer_size == max_buffer)
{
max_buffer += BUFFER_DELTA;
old_buffer = t = buffer;
buffer = s = NEW2(max_buffer, char);
for (i = 0; i < buffer_size; i++)
*s++ = *t++;
FREE(old_buffer);
}
buffer[buffer_size] = c;
buffer_size++;
}
saves(s)
register char *s;
{
register int c;
while (c = *s++) savec(c);
}
initialize_lex()
{
at_eof = 0;
lineno = 1;
mark_count = 0;
create_buffer();
}
skip_white_space()
{
register int c;
register FILE *fp;
register int done;
register int inside;
register int start_line;
static char comment_msg[] = "unterminated comment";
fp = input_file;
c = getc(fp);
done = 0;
while ( ! done)
{
switch (c)
{
case SP: case BS: case HT: case VT:
case FF: case CR: case DEL:
c = getc(fp);
break;
case NEWLINE:
lineno++;
c = getc(fp);
break;
case '/':
c = getc(fp);
if (c != '*')
illegal_character('/');
start_line = lineno;
c = getc(fp);
inside = 1;
while (inside)
{
if (c == '*')
{
c = getc(fp);
if (c == '/')
inside = 0;
}
else if (c == NEWLINE)
{
lineno++;
c = getc(fp);
}
else if (c == EOF)
error(start_line, comment_msg);
else
c = getc(fp);
}
c = getc(fp);
break;
default:
done = 1;
}
}
ungetc(c, fp);
}
get_identifier(c)
register int c;
{
register int inside;
register FILE *fp;
fp = input_file;
inside = 1;
while (inside)
{
savec(c);
c = getc(fp);
switch (c)
{
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '.': case '_':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
default:
inside = 0;
break;
}
}
yylval.bp = lookup(IDENTIFIER, buffer, buffer_size);
ungetc(c, fp);
}
int
get_string(c)
register int c;
{
register FILE *fp;
register int inside;
register int quote;
register int start_line;
register int n;
register int token;
static char string_msg[] = "unterminated string";
fp = input_file;
quote = c;
c = getc(fp);
start_line = lineno;
inside = 1;
while (inside)
{
switch (c)
{
case QUOTE:
case DOUBLE_QUOTE:
if (c == quote)
inside = 0;
else
{
savec(c);
c = getc(fp);
}
break;
case EOF:
case NEWLINE:
error(start_line, string_msg);
case BACKSLASH:
c = getc(fp);
switch (c)
{
case EOF:
error(start_line, string_msg);
case NEWLINE:
lineno++;
c = getc(fp);
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
n = NUMERIC_VALUE(c);
c = getc(fp);
if (IS_OCTAL(c))
{
n = 8*n + NUMERIC_VALUE(c);
c = getc(fp);
if (IS_OCTAL(c))
{
n = 8*n + NUMERIC_VALUE(c);
c = getc(fp);
}
}
if (c > MAXCHAR) error(lineno, "illegal character in string");
savec(n);
break;
case BACKSLASH:
case QUOTE:
case DOUBLE_QUOTE:
savec(c);
c = getc(fp);
break;
case 'b':
savec(BS);
c = getc(fp);
break;
case 't':
savec(HT);
c = getc(fp);
break;
case 'n':
savec(NEWLINE);
c = getc(fp);
break;
case 'v':
savec(VT);
c = getc(fp);
break;
case 'f':
savec(FF);
c = getc(fp);
break;
case 'r':
savec(CR);
c = getc(fp);
break;
default:
error(lineno, "illegal escape sequence");
}
break;
default:
savec(c);
c = getc(fp);
break;
}
}
if (buffer_size == 1)
{
if (buffer[0] == NUL) error(start_line, "null character not allowed");
yylval.bp = lookup(CHARACTER, buffer, buffer_size);
token = CHARACTER;
}
else
{
yylval.bp = lookup(STRING, buffer, buffer_size);
token = STRING;
}
return (token);
}
get_number(c)
register int c;
{
register FILE *fp;
register int n, k;
register int inside;
fp = input_file;
n = NUMERIC_VALUE(c);
inside = 1;
while (inside)
{
c = getc(fp);
switch (c)
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
k = NUMERIC_VALUE(c);
if (n > MAX_TOKEN_NUMBER/10 ||
(n == MAX_TOKEN_NUMBER/10 && k > MAX_TOKEN_NUMBER%10))
error(lineno, "number too large");
n = 10*n + k;
break;
default:
inside = 0;
break;
}
}
yylval.i = n;
ungetc(c, fp);
}
save_comment()
{
register int c;
register FILE *fp;
register int inside;
register int start_line;
static char comment_msg[] = "unterminated comment";
fp = input_file;
savec('/');
savec('*');
start_line = lineno;
c = getc(fp);
inside = 1;
while (inside)
{
switch (c)
{
case EOF:
error(start_line, comment_msg);
case '*':
savec(c);
c = getc(fp);
if (c == '/')
{
savec(c);
inside = 0;
}
break;
case NEWLINE:
lineno++;
savec(c);
c = getc(fp);
break;
default:
savec(c);
c = getc(fp);
break;
}
}
}
save_string(c)
register int c;
{
register FILE *fp;
register int quote;
register int inside;
register int start_line;
static char string_msg[] = "unterminated string";
fp = input_file;
quote = c;
savec(quote);
start_line = lineno;
c = getc(fp);
inside = 1;
while (inside)
{
switch (c)
{
case EOF:
case NEWLINE:
error(start_line, string_msg);
case QUOTE:
case DOUBLE_QUOTE:
savec(c);
if (c == quote)
inside = 0;
else
c = getc(fp);
break;
case BACKSLASH:
savec(c);
c = getc(fp);
if (c == EOF)
error(start_line, string_msg);
else if (c == NEWLINE)
lineno++;
savec(c);
c = getc(fp);
break;
default:
savec(c);
c = getc(fp);
break;
}
}
}
save_text()
{
register int c;
register FILE *fp;
register int inside;
register int start_line;
static char text_msg[] = "unterminated text";
fp = input_file;
start_line = lineno;
c = getc(fp);
inside = 1;
while (inside)
{
switch (c)
{
case EOF:
error(start_line, text_msg);
case '%':
c = getc(fp);
if (c == '}')
inside = 0;
else
savec('%');
break;
case BACKSLASH:
c = getc(fp);
if (c == '}')
inside = 0;
else
savec(BACKSLASH);
break;
case '/':
c = getc(fp);
if (c == '*')
{
save_comment();
c = getc(fp);
}
else
savec('/');
break;
case QUOTE:
case DOUBLE_QUOTE:
save_string(c);
c = getc(fp);
break;
case NEWLINE:
lineno++;
savec(c);
c = getc(fp);
break;
default:
savec(c);
c = getc(fp);
break;
}
}
}
int
match (s, m, t, n)
register char *s, *t;
int m, n;
{
register int i;
register int result;
if (m != n)
result = 0;
else
{
result = 1;
for (i = n; i > 0 && result; i--)
if (*s++ != *t++)
result = 0;
}
return (result);
}
save_union_text()
{
register int c;
register FILE *fp;
register int done;
register int level;
register int start_line;
static char union_msg[] = "malformed union declaration";
fp = input_file;
start_line = lineno;
saves("typedef union");
c = getc(fp);
level = 0;
done = 0;
while ( ! done)
{
switch (c)
{
case EOF:
error(start_line, union_msg);
case '{':
level++;
savec(c);
c = getc(fp);
break;
case '}':
level--;
savec(c);
if (level == 0)
done = 1;
else
c = getc(fp);
break;
case '/':
c = getc(fp);
if (c == '*')
{
save_comment();
c = getc(fp);
}
else
savec('/');
break;
case QUOTE:
case DOUBLE_QUOTE:
save_string(c);
c = getc(fp);
break;
case NEWLINE:
lineno++;
savec(c);
c = getc(fp);
break;
default:
savec(c);
c = getc(fp);
break;
}
}
saves(" YYSTYPE;\n");
yylval.tp = mk_text(start_line, buffer, buffer_size);
}
int
special_symbol(c)
register int c;
{
register FILE *fp;
register int token;
register int start_line;
register int strophe;
register int inside;
static char malformed_msg1[] = "malformed %-symbol";
static char malformed_msg2[] = "malformed \\-symbol";
static char unknown_msg[] = "unknown keyword";
fp = input_file;
strophe = c;
c = getc(fp);
switch (c)
{
case '{':
start_line = lineno;
save_text();
yylval.tp = mk_text(start_line, buffer, buffer_size);
token = TEXT;
break;
case '%':
case BACKSLASH:
mark_count++;
if (mark_count < 2)
token = MARK;
else
{
at_eof = 1;
token = END_OF_FILE;
}
break;
case '<':
token = LEFT;
break;
case '>':
token = RIGHT;
break;
case '0':
token = TOKEN;
break;
case '2':
token = NONASSOC;
break;
case '=':
token = PREC;
break;
case 'B': case 'L': case 'N': case 'P': case 'R':
case 'S': case 'T': case 'U':
case 'b': case 'l': case 'n': case 'p': case 'r':
case 's': case 't': case 'u':
inside = 1;
while (inside)
{
savec(c);
c = getc(fp);
switch (c)
{
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
break;
default:
inside = 0;
}
}
ungetc(c, fp);
switch (buffer[0])
{
case 'B':
if (match(buffer, buffer_size, "BINARY", 6))
token = NONASSOC;
else
error(lineno, unknown_msg);
break;
case 'L':
if (match(buffer, buffer_size, "LEFT", 4))
token = LEFT;
else
error(lineno, unknown_msg);
break;
case 'N':
if (match(buffer, buffer_size, "NONASSOC", 8))
token = NONASSOC;
else
error(lineno, unknown_msg);
break;
case 'P':
if (match(buffer, buffer_size, "PREC", 4))
token = PREC;
else
error(lineno, unknown_msg);
break;
case 'R':
if (match(buffer, buffer_size, "RIGHT", 5))
token = RIGHT;
else
error(lineno, unknown_msg);
break;
case 'S':
if (match(buffer, buffer_size, "START", 5))
token = START;
else
error(lineno, unknown_msg);
break;
case 'T':
if (match(buffer, buffer_size, "TERM", 4) ||
match(buffer, buffer_size, "TOKEN", 5))
token = TOKEN;
else if (match(buffer, buffer_size, "TYPE", 4))
token = TYPE;
else
error(lineno, unknown_msg);
break;
case 'U':
if (match(buffer, buffer_size, "UNION", 5))
{
clear_buffer();
save_union_text();
token = UNION_TEXT;
}
else
error(lineno, unknown_msg);
break;
case 'b':
if (match(buffer, buffer_size, "binary", 6))
token = NONASSOC;
else
error(lineno, unknown_msg);
break;
case 'l':
if (match(buffer, buffer_size, "left", 4))
token = LEFT;
else
error(lineno, unknown_msg);
break;
case 'n':
if (match(buffer, buffer_size, "nonassoc", 8))
token = NONASSOC;
else
error(lineno, unknown_msg);
break;
case 'p':
if (match(buffer, buffer_size, "prec", 4))
token = PREC;
else
error(lineno, unknown_msg);
break;
case 'r':
if (match(buffer, buffer_size, "right", 5))
token = RIGHT;
else
error(lineno, unknown_msg);
break;
case 's':
if (match(buffer, buffer_size, "start", 5))
token = START;
else
error(lineno, unknown_msg);
break;
case 't':
if (match(buffer, buffer_size, "term", 4) ||
match(buffer, buffer_size, "token", 5))
token = TOKEN;
else if (match(buffer, buffer_size, "type", 4))
token = TYPE;
else
error(lineno, unknown_msg);
break;
case 'u':
if (match(buffer, buffer_size, "union", 5))
{
clear_buffer();
save_union_text();
token = UNION_TEXT;
}
else
error(lineno, unknown_msg);
break;
}
break;
case 'A': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K':
case 'M': case 'O': case 'Q': case 'V': case 'W':
case 'X': case 'Y': case 'Z':
case 'a': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k':
case 'm': case 'o': case 'q': case 'v': case 'w':
case 'x': case 'y': case 'z':
error(lineno, unknown_msg);
default:
if (strophe == '%')
error(lineno, malformed_msg1);
else
error(lineno, malformed_msg2);
}
return (token);
}
get_action()
{
register int c;
register FILE *fp;
register int level;
register int start_line;
static char action_msg[] = "unterminated action";
fp = input_file;
start_line = lineno;
saves("\t\t\t{");
c = getc(fp);
level = 1;
while (level > 0)
{
switch (c)
{
case EOF:
error(start_line, action_msg);
case '{':
level++;
savec(c);
c = getc(fp);
break;
case '}':
level--;
savec(c);
if (level > 0)
c = getc(fp);
break;
case '/':
c = getc(fp);
if (c == '*')
{
save_comment();
c = getc(fp);
}
else
savec('/');
break;
case QUOTE:
case DOUBLE_QUOTE:
save_string(c);
c = getc(fp);
break;
case NEWLINE:
lineno++;
savec(c);
c = getc(fp);
break;
default:
savec(c);
c = getc(fp);
break;
}
}
yylval.tp = mk_text(start_line, buffer, buffer_size);
}
get_old_fashioned_action()
{
register int c;
register FILE *fp;
register int inside;
register int start_line;
static char action_msg[] = "unterminated action";
fp = input_file;
start_line = lineno;
skip_white_space();
c = getc(fp);
if (c == '{')
get_action();
else
{
saves("\t\t\t");
inside = 1;
while (inside)
{
switch (c)
{
case EOF:
error(start_line, action_msg);
case ';':
savec(c);
inside = 0;
break;
case '/':
c = getc(fp);
if (c == '*')
{
save_comment();
c = getc(fp);
}
else
savec('/');
break;
case QUOTE:
case DOUBLE_QUOTE:
save_string(c);
c = getc(fp);
break;
case NEWLINE:
lineno++;
savec(c);
c = getc(fp);
break;
default:
savec(c);
c = getc(fp);
break;
}
}
yylval.tp = mk_text(start_line, buffer, buffer_size);
}
}
get_type_identifier()
{
register int c;
register FILE *fp;
register int inside;
register int start_line;
static char type_id_msg[] = "malformed type-identifier";
fp = input_file;
start_line = lineno;
skip_white_space();
c = getc(fp);
switch (c)
{
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '_':
break;
default:
error(start_line, type_id_msg);
}
inside = 1;
while (inside)
{
savec(c);
c = getc(fp);
switch (c)
{
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '_':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
default:
inside = 0;
}
}
ungetc(c, fp);
skip_white_space();
c = getc(fp);
if (c != '>') error(start_line, type_id_msg);
yylval.bp = lookup(TYPE_IDENTIFIER, buffer, buffer_size);
}
int
yylex()
{
register int c;
register FILE *fp;
register int token;
fp = input_file;
clear_buffer();
if (at_eof)
token = END_OF_FILE;
else
{
clear_buffer();
skip_white_space();
c = getc(fp);
switch (c)
{
case EOF:
at_eof = 1;
token = END_OF_FILE;
break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '.': case '_':
get_identifier(c);
token = IDENTIFIER;
break;
case QUOTE:
case DOUBLE_QUOTE:
token = get_string(c);
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
get_number(c);
token = NUMBER;
break;
case '%':
case BACKSLASH:
token = special_symbol(c);
break;
case '{':
get_action();
token = ACTION;
break;
case '=':
get_old_fashioned_action();
token = ACTION;
break;
case ',':
token = COMMA;
break;
case ':':
token = COLON;
break;
case ';':
token = SEMICOLON;
break;
case '|':
token = BAR;
break;
case '<':
get_type_identifier();
token = TYPE_IDENTIFIER;
break;
default:
illegal_character(c);
}
}
return (token);
}